Biostat analysis

Julie DIJOUX

2024-08-28

Requirements

All analyses were performed using R version 4.2.0. The .rmd file must be located in the directory, along with the sample_table.csv file, which corresponds to the metadata, and the 0_DADA2_output folder, which contains the output files from the DADA2 pipeline for the 16S and ITS libraries: seqtab.nochim_16S.rdata and seqtab.nochim_ITS.rdata for the abundance tables, and the taxo_tab_16S.rdata and taxo_tab_ITS.rdata files, which correspond to the taxonomic affiliations. These files are necessary to run the entire script that follows.

Packages used

# install.packages("anyLib")
# library(anyLib) # This package allows the installation of the requested packages along with all necessary dependencies.
# 
# needed.packages <- c("ade4", "Biostrings", "cluster", "FSA", "gclus", "ggplot2", "ggpubr", "graphics", "igraph", "kableExtra", "knitr", "magick", "magrittr", "MASS", "microeco", "multcompView", "plotly", "psych", "RColorBrewer", "rcompanion", "rstatix", "tidyverse", "vegan", "webshot", "dplyr", "DT", "plyr", "purrr", "venn")
# 
# anyLib(needed.packages)

Preprocessing

Creating and saving the fasta file, abundance table and taxonomic table with ASV IDs

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "1_formatted_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

ID.fasta <- function(libraries) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)

  # Loading abundance table after DADA2 processing
  load(paste0("0_DADA2_output/seqtab.nochim_", libraries, ".rdata"))
  asv_tab <- data.frame(t(seqtab.nochim))
  asv_tab$seq <- row.names(asv_tab)

  # Loading taxonomic table after DADA2 processing
  load(paste0("0_DADA2_output/taxo_tab_", libraries, ".rdata"))
  taxo_tab <- data.frame(taxotab)
  taxo_tab$seq <- row.names(taxo_tab) # Creation of a "seq" variable containing raw sequences

  # Merging tables using the "seq" variable
  print("Dimensions before the merger:")
  print(dim(asv_tab))
  print(dim(taxo_tab))

  asv_taxo_tab <- merge(taxo_tab,
    asv_tab,
    by = "seq",
    all.x = TRUE,
    all.y = TRUE
  )

  print("Dimensions after the merger:")
  print(dim(asv_taxo_tab))

  # Creating ASV IDs
  asv_taxo_tab$ASV_ID <- sprintf(
    paste0("ASV%04d_", libraries),
    seq_along(asv_taxo_tab$seq)
  )

  row.names(asv_taxo_tab) <- asv_taxo_tab$ASV_ID # Putting ASV IDs as row names

  # Creating the fasta file, abundance table and taxonomic table with ASV IDs
  writeLines(
    paste0(">", asv_taxo_tab$ASV_ID, "\n", asv_taxo_tab$seq),
    paste0("1_formatted_files/ASV_seq_", libraries, ".fasta")
  ) # Creating and saving the fasta file

  cols_to_exclude <- c(
    "Kingdom",
    "Phylum",
    "Class",
    "Order",
    "Family",
    "Genus",
    "Species",
    "ASV_ID",
    "seq"
  )
  asv_tab <- asv_taxo_tab[, !(names(asv_taxo_tab) %in% cols_to_exclude)] # Creating the abundance table

  taxo_tab <- subset(asv_taxo_tab,
    select = c(
      "Kingdom",
      "Phylum",
      "Class",
      "Order",
      "Family",
      "Genus",
      "Species"
    )
  ) # Creating the taxonomic table

  saveRDS(asv_tab,
    file = paste0("1_formatted_files/ASV_tab_all_", libraries, ".RDS")
  ) # Saving the abundance table
  saveRDS(taxo_tab,
    file = paste0("1_formatted_files/taxo_tab_all_", libraries, ".RDS")
  ) # Saving the taxonomic table
}

ID.fasta("16S")
## [1] "Dimensions before the merger:"
## [1] 5963  165
## [1] 5963    8
## [1] "Dimensions after the merger:"
## [1] 5963  172
ID.fasta("ITS")
## [1] "Dimensions before the merger:"
## [1] 7257  165
## [1] 7257    8
## [1] "Dimensions after the merger:"
## [1] 7257  172

Correcting abundance tables according to negative controls

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "2_corrected_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

correct.tab <- function(libraries) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)

  library(plyr)

  # Loading the abundance table
  ASV_tab <- readRDS(paste0("1_formatted_files/ASV_tab_all_", libraries, ".RDS"))

  # Creating the abundance subtables for each "species" (Pg, Ps, and Bulk soil (=Bs))
  Pg_tab <- subset(ASV_tab, select = grep("_Pg", names(ASV_tab))) # Pg abundance subtable
  Ps_tab <- subset(ASV_tab, select = grep("_Ps", names(ASV_tab))) # Ps abundance subtable
  Bulk_soil_df <- subset(ASV_tab, select = grep("_SNR", names(ASV_tab))) # Bs abundance subtable

  # Creating the abundance subtables for each "species" (Pg, Ps, and Bulk soil (=Bs)) and "compartments" (leaves, pulps, seeds, roots and soil)
  Pg_Leaves_df <- subset(Pg_tab, select = grep("_F", names(Pg_tab))) # Pg_leaves abundance subtable
  Pg_Pulps_Seeds_df <- subset(Pg_tab, select = c(
    grep("ADN_P", names(Pg_tab), fixed = TRUE),
    grep("Tneg_P", names(Pg_tab), fixed = TRUE),
    grep("_G", names(Pg_tab), fixed = TRUE)
  )) # Pg_pulps_seeds abundance subtable
  Pg_Roots_df <- subset(Pg_tab, select = grep("_R", names(Pg_tab))) # Pg_roots abundance subtable
  Pg_Soil_df <- subset(Pg_tab, select = grep("_S", names(Pg_tab))) # Pg_soil abundance subtable

  Ps_Leaves_df <- subset(Ps_tab, select = grep("_F", names(Ps_tab))) # Ps_leaves abundance subtable
  Ps_Pulps_Seeds_df <- subset(Ps_tab, select = c(
    grep("ADN_P", names(Ps_tab), fixed = TRUE),
    grep("Tneg_P", names(Ps_tab), fixed = TRUE),
    grep("_G", names(Ps_tab), fixed = TRUE)
  )) # Ps_pulps_seeds abundance subtable
  Ps_Roots_df <- subset(Ps_tab, select = grep("_R", names(Ps_tab))) # Ps_roots abundance subtable
  Ps_Soil_df <- subset(Ps_tab, select = grep("_S", names(Ps_tab))) # Ps_soil abundance subtable

  # Correcting tables
  group_names <- c(
    "Pg_Leaves_df",
    "Pg_Pulps_Seeds_df",
    "Pg_Roots_df",
    "Pg_Soil_df",
    "Ps_Leaves_df",
    "Ps_Pulps_Seeds_df",
    "Ps_Roots_df",
    "Ps_Soil_df",
    "Bulk_soil_df"
  )
  all_results <- list()

  for (group_name in group_names) {
    # Loading the subtables
    group <- get(group_name)

    # Getting the "Tneg" pattern (corresponding to negative controls)
    col_indices <- grep("Tneg", colnames(group))

    # Function for the subtraction of the average of non-zero "Tneg" columns for each table row
    subtract_mean <- function(row) {
      mean_values <- mean(row[col_indices], na.rm = TRUE)
      result <- row - mean_values
      result[result < 0] <- 0 # Replacing negative values by 0
      return(result)
    }

    # Applying the function to each sub-table row
    result_df <- as.data.frame(t(apply(group, 1, subtract_mean)))

    # Assigning ASV IDs for row names
    result_df$RowNames <- rownames(group)

    # Assigning original data frame column names to result_df
    colnames(result_df) <- c(colnames(group), "RowNames")

    # Adding result to list
    all_results[[group_name]] <- result_df
  }

  # Merging all results in a single dataframe
  final_result <- all_results[[1]] # Initializing with the first data frame

  # Merging with the other data frames
  for (i in 2:length(all_results)) {
    final_result <- merge(final_result, all_results[[i]], by = "RowNames", all = TRUE)
  }

  # Replacing NA by 0
  final_result[is.na(final_result)] <- 0

  # Removing "Tneg" colomns
  cols_to_remove <- grep("Tneg", colnames(final_result))
  final_result <- final_result[, -cols_to_remove]
  row.names(final_result) <- final_result$RowNames
  final_result <- final_result[, -1]

  # Saving the corrected abundance table
  saveRDS(final_result, file = paste0("2_corrected_files/ASV_tab_all_corrected_", libraries, ".RDS"))
}

correct.tab("16S")
correct.tab("ITS")

Removal of non-bacterial/fungal taxa

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "3_filtered_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

filt_taxa <- function(libraries) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)

  library(microeco)
  library(magrittr)
  library(Biostrings)

  # Loading of corrected abundance table, taxonomic table, sample metadata and fasta file
  ASV_tab <- readRDS(paste0("2_corrected_files/ASV_tab_all_corrected_", libraries, ".RDS")) # corrected abundance table
  taxo_tab <- readRDS(paste0("1_formatted_files/taxo_tab_all_", libraries, ".RDS")) # taxonomic table
  taxo_tab %<>% tidy_taxonomy() # Function to format the taxonomic table (ex.: "Bacteria" -> "k__Bacteria"; add "k__" for Kingdom, "p__" for Phylum, "c__" for Class, "f__" for Family, "o__" for Order, "g__" for Genus and "s__" for Species)
  sample_data <- read.csv("sample_table.csv",
    sep = ";",
    header = T, row.names = "Sample_ID"
  )
  fasta_file <- paste0("1_formatted_files/ASV_seq_", libraries, ".fasta") # Path to fasta file
  fasta <- readDNAStringSet(fasta_file) # Reading the fasta file

  # Creating a microtable object with microeco package
  dataset <- microtable$new(
    otu_table = ASV_tab,
    tax_table = taxo_tab,
    sample_table = sample_data,
    rep_fasta = fasta
  )

  dataset$tax_table$Kingdom[grepl("k__$", dataset$tax_table$Kingdom)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Phylum[grepl("p__$", dataset$tax_table$Phylum)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Species[grepl("s__$", dataset$tax_table$Species)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Genus[grepl("g__$", dataset$tax_table$Genus)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Family[grepl("f__$", dataset$tax_table$Family)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Order[grepl("o__$", dataset$tax_table$Order)] %<>% paste0(., "Unclassified")
  dataset$tax_table$Class[grepl("c__$", dataset$tax_table$Class)] %<>% paste0(., "Unclassified")

  print(dataset)
  dataset$tidy_dataset()
  print(dataset) # tidy_dataset(): function to trim all the data to make all files consistent

  if (libraries == "16S") {
    # Deletion of non-bacterial taxa
    dataset$tax_table %<>% base::subset(Kingdom == "k__Bacteria")

    dataset$filter_pollution(taxa = c("Mitochondria", "Chloroplast"))
    dataset
    dataset$tidy_dataset()
    print(dataset)

    dataset$tidy_dataset() # One more time to remove samples with 0 ASV
    print(dataset)
  } else if (libraries == "ITS") {
    # Deletion of non-fungal taxa
    dataset$tax_table %<>% base::subset(Kingdom == "k__Fungi")

    dataset$filter_pollution(taxa = c("Mitochondria", "Chloroplast"))
    dataset

    dataset$tidy_dataset()
    print(dataset)

    dataset$tidy_dataset() # One more time to remove samples with 0 ASV
    print(dataset)
  }

  # Saving the filtered and corrected abundance table
  saveRDS(dataset$otu_table,
    file = paste0("3_filtered_files/ASV_tab_all_corrected_filt_", libraries, ".RDS")
  )
  # Saving the filtered and corrected taxonomic table
  saveRDS(dataset$tax_table,
    file = paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS")
  )
  # Saving the filtered and corrected fasta file
  writeXStringSet(dataset$rep_fasta,
    format = "fasta",
    file = paste0("3_filtered_files/ASV_seq_filt_", libraries, ".fasta")
  )
}

filt_taxa("16S")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5821 rows and 147 columns
## tax_table have 5963 rows and 7 columns
## rep_fasta have 5963 sequences
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5821 rows and 147 columns
## tax_table have 5821 rows and 7 columns
## rep_fasta have 5821 sequences
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## rep_fasta have 5601 sequences
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## rep_fasta have 5601 sequences
filt_taxa("ITS")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 7228 rows and 146 columns
## tax_table have 7257 rows and 7 columns
## rep_fasta have 7257 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 7228 rows and 145 columns
## tax_table have 7228 rows and 7 columns
## rep_fasta have 7228 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## rep_fasta have 4625 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## rep_fasta have 4625 sequences

Normalization of abundance tables in CPM

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "4_normalized_tables"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

norm.CPM <- function(libraries) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)

  # Loading the filtered and corrected abundance table
  asv_tab <- readRDS(paste0("3_filtered_files/ASV_tab_all_corrected_filt_", libraries, ".RDS"))
  total_seq <- colSums(asv_tab) # Calculating the sum of columns
  asv_tab_normCPM <- data.frame(lapply(
    data.frame(t((t(asv_tab) / total_seq) * 1000000)),
    as.integer
  )) # Normalization in CPM
  row.names(asv_tab_normCPM) <- row.names(asv_tab) # Assigning ASV IDs for row names

  # Saving the corrected, filtered and normalized abundance table
  saveRDS(asv_tab_normCPM,
    file = paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS")
  )
}

norm.CPM("16S")
norm.CPM("ITS")

ASV distribution (truth tables)

All data

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "6_venn_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

distrib.venn1 <- function(libraries, type) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # type = "ASV"

  # Loading corrected, filtered and normalized abundance table
  tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))


  # Editing list of ASVs for each species and the bulk soil (=Bs)
  Pg <- subset(tab, select = grep("_Pg", names(tab)))
  Ps <- subset(tab, select = grep("_Ps", names(tab)))
  Bs <- subset(tab, select = grep("_SNR", names(tab)))

  # Creating ASV list with
  sums_per_row <- rowSums(Pg)
  indices <- which(sums_per_row > 0)
  Pg_list <- rownames(Pg)[indices] # Pg ASV list

  sums_per_row <- rowSums(Ps)
  indices <- which(sums_per_row > 0)
  Ps_list <- rownames(Ps)[indices] # Ps ASV list

  sums_per_row <- rowSums(Bs)
  indices <- which(sums_per_row > 0)
  Bs_list <- rownames(Bs)[indices] # Bs ASV list

  library(venn)

  # Editing ASV distribution table with venn package
  x <- list(Pg = Pg_list, Ps = Ps_list, Bs = Bs_list)
  info_x <- extractInfo(x, what = c("counts", "intersections", "both"), use.names = TRUE)
  info_x <- info_x[-1, ]
  info_x <- rbind(info_x, colSums(info_x)) # Add a row with the sums of each colomn
  rownames(info_x)[nrow(info_x)] <- "Total"

  write.table(info_x,
    file = paste0("6_venn_files/venn_all_", type, "_", libraries, ".csv"),
    sep = ";", row.names = TRUE, col.names = NA
  ) # Saving the table to create venn diagram later

  # Counts by compartments
  Species <- c("Pg (Ni-HA)", "Ps (Ni-T)", "Bulk soil")
  Counts <- c(
    sum(subset(info_x, Pg == 1)$counts),
    sum(subset(info_x, Ps == 1)$counts),
    sum(subset(info_x, Bs == 1)$counts)
  )
  counts_tab <- data.frame(Species, Counts)

  # Reorganization
  counts_tab$Species <- factor(counts_tab$Species,
    levels = c(
      "Pg (Ni-HA)",
      "Ps (Ni-T)",
      "Bulk soil"
    )
  )
  # Histogramme
  library(ggplot2)
  barplot_counts <- ggplot(counts_tab, aes(x = Counts, y = Species)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = Counts, vjust = 0.3)) +
    theme(
      axis.title.y = element_blank(),
      axis.text.y = element_text(size = 14)
    ) +
    xlab("Number of ASVs")

  ggsave(paste0("6_venn_files/histo_venn_all_", type, "_", libraries, ".svg"),
    plot = barplot_counts,
    width = 15,
    height = 5,
    units = "cm",
    dpi = 1200
  )

  # Editing list of specific and shared ASVs
  Pg.Ps.Bs <- Reduce(intersect, list(Pg_list, Ps_list, Bs_list)) # Subset "Pg-Ps-Bs"

  Pg_without_Pg.Ps.Bs <- setdiff(Pg_list, Pg.Ps.Bs) # "Pg" + "Pg-Ps" + "Pg-Bs" subsets
  Ps_without_Pg.Ps.Bs <- setdiff(Ps_list, Pg.Ps.Bs) # "Ps" + "Pg-Ps" + "Ps-Bs" subsets
  Bs_without_Pg.Ps.Bs <- setdiff(Bs_list, Pg.Ps.Bs) # "Bs" + "Pg-Bs" + "Ps-Bs" subsets

  Pg.Ps <- Reduce(intersect, list(Pg_without_Pg.Ps.Bs, Ps_without_Pg.Ps.Bs)) # Subset "Pg-Ps"
  Pg.Bs <- Reduce(intersect, list(Pg_without_Pg.Ps.Bs, Bs_without_Pg.Ps.Bs)) # Subset "Pg-Bs"
  Ps.Bs <- Reduce(intersect, list(Ps_without_Pg.Ps.Bs, Bs_without_Pg.Ps.Bs)) # Subset "Ps-Bs"

  Pg <- setdiff(setdiff(setdiff(Pg_list, Pg.Bs), Pg.Ps), Pg.Ps.Bs) # Subset "Pg"
  Ps <- setdiff(setdiff(setdiff(Ps_list, Ps.Bs), Pg.Ps), Pg.Ps.Bs) # Subset "Ps"
  Bs <- setdiff(setdiff(setdiff(Bs_list, Pg.Bs), Ps.Bs), Pg.Ps.Bs) # Subset "Bs"

  # Saving list of each subset
  saveRDS(Pg, file = paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS"))
  saveRDS(Pg.Bs, file = paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS"))
  saveRDS(Pg.Ps, file = paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS"))
  saveRDS(Pg.Ps.Bs, file = paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS"))
  saveRDS(Ps, file = paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS"))
  saveRDS(Ps.Bs, file = paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS"))
  saveRDS(Bs, file = paste0("6_venn_files/list_ASV_Bs_subset_", libraries, ".RDS"))

  # Display truth table
  library("DT")
  datatable(info_x,
    width = NULL, height = NULL,
    caption = paste0(type, " distribution (", libraries, ")")
  )
}

distrib.venn1("16S", "ASV")
distrib.venn1("ITS", "ASV")

Specific data

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

distrib.venn2 <- function(libraries, type, subset) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # type = "ASV"
  # subset = "Pg-Ps", "Pg-Ps-Bs", "Pg", "Ps", "Pg-Bs", "Ps-Bs", "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets), "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets), "core" (corresponding to "Pg-Ps" + "Pg-Ps-Bs" subsets), or "recruited" (corresponding to "Ps-Bs" + "Pg-Bs" + "Pg-Ps-Bs" subsets)

  # Loading ASV list
  if (subset == "Pg-spec") {
    list1_path <- paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS")
    list2_path <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")

    if (file.exists(list1_path) && file.exists(list2_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list2_path)
    } else if (file.exists(list1_path) && !file.exists(list2_path)) {
      list <- readRDS(list1_path)
    } else if (!file.exists(list1_path) && file.exists(list2_path)) {
      list <- readRDS(list2_path)
    } else if (!file.exists(list1_path) && !file.exists(list2_path)) {
      print("No data available.")
      return()
    }
  } else if (subset == "Ps-spec") {
    list1_path <- paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS")
    list2_path <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")

    if (file.exists(list1_path) && file.exists(list2_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list2_path)
    } else if (file.exists(list1_path) && !file.exists(list2_path)) {
      list <- readRDS(list1_path)
    } else if (!file.exists(list1_path) && file.exists(list2_path)) {
      list <- readRDS(list2_path)
    } else if (!file.exists(list1_path) && !file.exists(list2_path)) {
      print("No data available.")
      return()
    }
  } else if (subset == "core") {
    list1_path <- paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS")
    list2_path <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
    if (file.exists(list1_path) && file.exists(list2_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list2_path)
    } else if (file.exists(list1_path) && !file.exists(list2_path)) {
      list <- readRDS(list1_path)
    } else if (!file.exists(list1_path) && file.exists(list2_path)) {
      list <- readRDS(list2_path)
    } else if (!file.exists(list1_path) && !file.exists(list2_path)) {
      print("No data available.")
      return()
    }
  } else if (subset == "recruited") {
    list1_path <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
    list2_path <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")
    list3_path <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")
    if (file.exists(list1_path) && file.exists(list2_path) && file.exists(list3_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list2_path)
      list3 <- readRDS(list3_path)
    } else if (file.exists(list1_path) && !file.exists(list2_path) && !file.exists(list3_path)) {
      list <- readRDS(list1_path)
    } else if (!file.exists(list1_path) && file.exists(list2_path) && !file.exists(list3_path)) {
      list <- readRDS(list2_path)
    } else if (!file.exists(list1_path) && !file.exists(list2_path) && file.exists(list3_path)) {
      list <- readRDS(list3_path)
    } else if (file.exists(list1_path) && file.exists(list2_path) && !file.exists(list3_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list2_path)
    } else if (file.exists(list1_path) && !file.exists(list2_path) && file.exists(list3_path)) {
      list1 <- readRDS(list1_path)
      list2 <- readRDS(list3_path)
    } else if (!file.exists(list1_path) && file.exists(list2_path) && file.exists(list3_path)) {
      list1 <- readRDS(list2_path)
      list2 <- readRDS(list3_path) 
    }
  }
    else {
    list_path <- paste0("6_venn_files/list_ASV_", subset, "_subset_", libraries, ".RDS")
    if (file.exists(list_path)) {
      list <- readRDS(list_path)
    } else {
      print(paste0("No ", type, " in ", subset, " subset"))
      return()
    }
  }

  # Loading ASV abundance table
  abund_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))

  # ASV obtained only in the subset called
  if (subset == "Pg-spec" | subset == "Ps-spec" | subset == "core") {
    if (exists("list1") && exists("list2")) {
      list <- c(list1, list2)
    }
  } else if (subset == "recruited") {
    if (exists("list1") && exists("list2") && exists("list3")) {
      list <- c(list1, list2, list3)
    } else if (exists("list1") && exists("list2") && !exists("list3")) {
      list <- c(list1, list2)
    } else if (exists("list1") && !exists("list2") && exists("list3")) {
      list <- c(list1, list3)
    } else if (!exists("list1") && exists("list2") && exists("list3")) {
      list <- c(list2, list3)
    }
  }
  tab <- subset(abund_tab, rownames(abund_tab) %in% list)

  # Creation of abundance tables by compartment
  Leaves <- "ADN_F"
  selected_cols <- grep(Leaves, names(tab), value = TRUE)
  Leaves_df <- tab[, selected_cols]
  if (is.null(dim(Leaves_df))) {
    rm(list = "Leaves_df")
  }
  if (exists("Leaves_df")) Leaves_df <- Leaves_df[rowSums(Leaves_df) != 0, ] # Remove ASV with 0 abundance

  Pulps <- "ADN_P"
  selected_cols <- grep(Pulps, names(tab), value = TRUE)
  Pulps_df <- tab[, selected_cols]
  if (is.null(dim(Pulps_df))) {
    rm(list = "Pulps_df")
  }
  if (exists("Pulps_df")) Pulps_df <- Pulps_df[rowSums(Pulps_df) != 0, ] # Remove ASV with 0 abundance

  Seeds <- "ADN_G"
  selected_cols <- grep(Seeds, names(tab), value = TRUE)
  Seeds_df <- tab[, selected_cols]
  if (is.null(dim(Seeds_df))) {
    rm(list = "Seeds_df")
  }
  if (exists("Seeds_df")) Seeds_df <- Seeds_df[rowSums(Seeds_df) != 0, ] # Remove ASV with 0 abundance

  Roots <- "ADN_R"
  selected_cols <- grep(Roots, names(tab), value = TRUE)
  Roots_df <- tab[, selected_cols]
  if (is.null(dim(Roots_df))) {
    rm(list = "Roots_df")
  }
  if (exists("Roots_df")) Roots_df <- Roots_df[rowSums(Roots_df) != 0, ] # Remove ASV with 0 abundance

  Soil <- "ADN_S"
  selected_cols <- grep(Soil, names(tab), value = TRUE)
  Soil_df <- tab[, selected_cols]
  if (is.null(dim(Soil_df))) {
    rm(list = "Soil_df")
  }
  if (exists("Soil_df")) Soil_df <- Soil_df[rowSums(Soil_df) != 0, ] # Remove ASV with 0 abundance

  # Creating truth table
  library(venn)
  x <- list()
  if (exists("Seeds_df")) x$Seeds <- rownames(Seeds_df)
  if (exists("Pulps_df")) x$Pulps <- rownames(Pulps_df)
  if (exists("Leaves_df")) x$Leaves <- rownames(Leaves_df)
  if (exists("Roots_df")) x$Roots <- rownames(Roots_df)
  if (exists("Soil_df")) x$Soil <- rownames(Soil_df)
  info_x <- extractInfo(x, what = c("counts", "intersections", "both"), use.names = TRUE)
  info_x <- info_x[-1, ]
  info_x <- rbind(info_x, colSums(info_x))
  rownames(info_x)[nrow(info_x)] <- "Total"

  # Saving venn table
  write.table(info_x,
    file = paste0("6_venn_files/venn_", subset, "_ASV_", libraries, ".csv"),
    sep = ";", row.names = TRUE, col.names = NA
  )

  # Counts by compartments
  Compartment <- c("Seeds", "Pulps", "Leaves", "Roots", "R. soil")
  Counts <- c(
    sum(subset(info_x, Seeds == 1)$counts),
    sum(subset(info_x, Pulps == 1)$counts),
    sum(subset(info_x, Leaves == 1)$counts),
    sum(subset(info_x, Roots == 1)$counts),
    sum(subset(info_x, Soil == 1)$counts)
  )
  counts_tab <- data.frame(Compartment, Counts)

  # Reorganization
  counts_tab$Compartment <- factor(counts_tab$Compartment,
    levels = c(
      "Seeds",
      "Pulps",
      "Leaves",
      "Roots",
      "R. soil"
    )
  )
  # Histogramme
  library(ggplot2)
  if (subset == "core") {
    barplot_counts <- ggplot(counts_tab, aes(x = Compartment, y = Counts)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = Counts, vjust = 0.3)) +
      theme(
        axis.title.x = element_blank(),
        axis.text.x = element_text(size = 14)
      ) +
      ylab("Number of ASVs")

    ggsave(paste0("6_venn_files/histo_venn_", subset, "_ASV_", libraries, ".svg"),
      plot = barplot_counts,
      width = 15,
      height = 5,
      units = "cm",
      dpi = 1200
    )
  } else {
    barplot_counts <- ggplot(counts_tab, aes(x = Counts, y = Compartment)) +
      geom_bar(stat = "identity") +
      geom_text(aes(label = Counts, vjust = 0.3)) +
      theme(
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 14)
      ) +
      xlab("Number of ASVs")

    ggsave(paste0("6_venn_files/histo_venn_", subset, "_ASV_", libraries, ".svg"),
      plot = barplot_counts,
      width = 15,
      height = 5,
      units = "cm",
      dpi = 1200
    )
  }

  # Display the truth tables
  library("DT")
  datatable(info_x,
    width = NULL, height = NULL,
    caption = paste0(type, " distribution in ", subset, " subset (", libraries, ")")
  )
}

distrib.venn2("16S", "ASV", "Pg-spec")
distrib.venn2("16S", "ASV", "Ps-spec")
distrib.venn2("ITS", "ASV", "Pg-spec")
distrib.venn2("ITS", "ASV", "Ps-spec")
distrib.venn2("16S", "ASV", "core")
distrib.venn2("ITS", "ASV", "core")
distrib.venn2("16S", "ASV", "recruited")
distrib.venn2("ITS", "ASV", "recruited")

Editing specific ASV abundance tables and taxonomic tables

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "7_editing_figures"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder_1 <- "ASV_data"
if (!file.exists(newfolder_1)) dir.create(file.path(path, newfolder_1))

edit.spec.tab <- function(libraries, type, subset) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # type = "ASV"
  # subset = "Pg-Ps", "Pg-Ps-Bs", "Pg", "Ps", "Pg-Bs", "Ps-Bs", "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets), "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets) or "core" (corresponding to "Pg-Ps" + "Pg-Ps-Bs" subsets)

  library(microeco)
  library(magrittr)
  library(Biostrings)
  library(ggplot2)

  # Loading sample metadata
  sample_data <- read.csv("sample_table.csv",
    sep = ";",
    header = T, row.names = "Sample_ID"
  )

  # Loading abundance table and taxonomic table
  asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
  taxo_tab <- readRDS(paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS"))

  # Creating microtable object
  DS <- microtable$new(
    otu_table = asv_tab,
    tax_table = taxo_tab,
    sample_table = sample_data
  )

  print(DS)
  DS$tidy_dataset()
  print(DS) # Making all files consistent

  # Loading ASV list
  library(dplyr)

  if (subset == "Pg-spec") {
    distrib_path_Pg <- paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS")
    distrib_path_Pg.Bs <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")
    if (file.exists(distrib_path_Pg) && file.exists(distrib_path_Pg.Bs)) {
      distrib_Pg <- readRDS(distrib_path_Pg)
      distrib_Pg.Bs <- readRDS(distrib_path_Pg.Bs)
      distrib <- c(distrib_Pg, distrib_Pg.Bs)
    } else if (file.exists(distrib_path_Pg) && !file.exists(distrib_path_Pg.Bs)) {
      distrib <- readRDS(distrib_path_Pg)
    } else if (!file.exists(distrib_path_Pg) && file.exists(distrib_path_Pg.Bs)) {
      distrib <- readRDS(distrib_path_Pg.Bs)
    } else {
      print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
      return()
    }
  } else if (subset == "Ps-spec") {
    distrib_path_Ps <- paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS")
    distrib_path_Ps.Bs <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")
    if (file.exists(distrib_path_Ps) && file.exists(distrib_path_Ps.Bs)) {
      distrib_Ps <- readRDS(distrib_path_Ps)
      distrib_Ps.Bs <- readRDS(distrib_path_Ps.Bs)
      distrib <- c(distrib_Ps, distrib_Ps.Bs)
    } else if (file.exists(distrib_path_Ps) && !file.exists(distrib_path_Ps.Bs)) {
      distrib <- readRDS(distrib_path_Ps)
    } else if (!file.exists(distrib_path_Ps) && file.exists(distrib_path_Ps.Bs)) {
      distrib <- readRDS(distrib_path_Ps.Bs)
    } else {
      print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
      return()
    }
  } else if (subset == "core") {
    distrib_path_Pg.Ps <- paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS")
    distrib_path_Pg.Ps.Bs <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
    if (file.exists(distrib_path_Pg.Ps) && file.exists(distrib_path_Pg.Ps.Bs)) {
      distrib_Pg.Ps <- readRDS(distrib_path_Pg.Ps)
      distrib_Pg.Ps.Bs <- readRDS(distrib_path_Pg.Ps.Bs)
      distrib <- c(distrib_Pg.Ps, distrib_Pg.Ps.Bs)
    } else if (file.exists(distrib_path_Pg.Ps) && !file.exists(distrib_path_Pg.Ps.Bs)) {
      distrib <- readRDS(distrib_path_Pg.Ps)
    } else if (!file.exists(distrib_path_Pg.Ps) && file.exists(distrib_path_Pg.Ps.Bs)) {
      distrib <- readRDS(distrib_path_Pg.Ps.Bs)
    } else {
      print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
      return()
    }
  }

  tab <- subset(DS$otu_table, rownames(DS$otu_table) %in% distrib)

  # Making all files consistent
  DS2 <- microtable$new(
    otu_table = tab,
    tax_table = DS$tax_table,
    sample_table = DS$sample_table
  )
  print(DS2)
  DS2$tidy_dataset()
  print(DS2)

  abund_tab <- DS2$otu_table
  tab <- DS2$tax_table

  # Saving abundance table and taxonomic table for each subset
  saveRDS(abund_tab, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", libraries, ".RDS"))
  saveRDS(tab, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", libraries, ".RDS"))

  # Subtable creations
  Leaves <- "ADN_F"
  selected_cols <- grep(Leaves, names(abund_tab), value = TRUE)
  leaves_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
  if (ncol(leaves_tab) == 0) {
    print("No available data for leaves compartment")
  } else {
    DS_leaves <- microtable$new(
      otu_table = leaves_tab,
      tax_table = DS2$tax_table
    )
    print(DS_leaves)
    DS_leaves$tidy_dataset()
    print(DS_leaves)
    saveRDS(DS_leaves$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_leaves_", libraries, ".RDS"))
    saveRDS(DS_leaves$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_leaves_", libraries, ".RDS"))
  }

  Pulps <- "ADN_P"
  selected_cols <- grep(Pulps, names(abund_tab), value = TRUE)
  pulps_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
  if (ncol(pulps_tab) == 0) {
    print("No available data for pulps compartment")
  } else {
    DS_pulps <- microtable$new(
      otu_table = pulps_tab,
      tax_table = DS2$tax_table
    )
    print(DS_pulps)
    DS_pulps$tidy_dataset()
    print(DS_pulps)
    saveRDS(DS_pulps$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_pulps_", libraries, ".RDS"))
    saveRDS(DS_pulps$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_pulps_", libraries, ".RDS"))
  }

  Seeds <- "ADN_G"
  selected_cols <- grep(Seeds, names(abund_tab), value = TRUE)
  seeds_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
  if (ncol(seeds_tab) == 0) {
    print("No available data for seeds compartment")
  } else {
    DS_seeds <- microtable$new(
      otu_table = seeds_tab,
      tax_table = DS2$tax_table
    )
    print(DS_seeds)
    DS_seeds$tidy_dataset()
    print(DS_seeds)
    saveRDS(DS_seeds$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_seeds_", libraries, ".RDS"))
    saveRDS(DS_seeds$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_seeds_", libraries, ".RDS"))
  }

  Roots <- "ADN_R"
  selected_cols <- grep(Roots, names(abund_tab), value = TRUE)
  roots_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
  if (ncol(roots_tab) == 0) {
    print("No available data for roots compartment")
  } else {
    DS_roots <- microtable$new(
      otu_table = roots_tab,
      tax_table = DS2$tax_table
    )
    print(DS_roots)
    DS_roots$tidy_dataset()
    print(DS_roots)
    saveRDS(DS_roots$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_roots_", libraries, ".RDS"))
    saveRDS(DS_roots$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_roots_", libraries, ".RDS"))
  }

  Soil <- "ADN_S"
  selected_cols <- grep(Soil, names(abund_tab), value = TRUE)
  soil_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
  if (ncol(soil_tab) == 0) {
    print("No available data for soil compartment")
  } else {
    DS_soil <- microtable$new(
      otu_table = soil_tab,
      tax_table = DS2$tax_table
    )
    print(DS_soil)
    DS_soil$tidy_dataset()
    print(DS_soil)
    saveRDS(DS_soil$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_soil_", libraries, ".RDS"))
    saveRDS(DS_soil$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_soil_", libraries, ".RDS"))
  }
}

edit.spec.tab("16S", "ASV", "Pg-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 67 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 9 rows and 8 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 9 rows and 8 columns
## tax_table have 9 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 9 rows and 10 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 9 rows and 10 columns
## tax_table have 9 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 1 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 375 rows and 16 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 375 rows and 16 columns
## tax_table have 375 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 471 rows and 32 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 471 rows and 32 columns
## tax_table have 471 rows and 7 columns
edit.spec.tab("16S", "ASV", "Ps-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 64 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 1 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 11 rows and 8 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 11 rows and 8 columns
## tax_table have 11 rows and 7 columns
## microtable-class object:
## sample_table have 15 rows and 2 columns
## otu_table have 307 rows and 15 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 15 rows and 2 columns
## otu_table have 307 rows and 15 columns
## tax_table have 307 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 822 rows and 32 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 822 rows and 32 columns
## tax_table have 822 rows and 7 columns
edit.spec.tab("ITS", "ASV", "Pg-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 83 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 91 rows and 16 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 91 rows and 16 columns
## tax_table have 91 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 64 rows and 10 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 64 rows and 10 columns
## tax_table have 64 rows and 7 columns
## microtable-class object:
## sample_table have 9 rows and 2 columns
## otu_table have 27 rows and 9 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 9 rows and 2 columns
## otu_table have 27 rows and 9 columns
## tax_table have 27 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 241 rows and 16 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 241 rows and 16 columns
## tax_table have 241 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 680 rows and 32 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 680 rows and 32 columns
## tax_table have 680 rows and 7 columns
edit.spec.tab("ITS", "ASV", "Ps-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 78 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 475 rows and 16 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 475 rows and 16 columns
## tax_table have 475 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 72 rows and 8 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 72 rows and 8 columns
## tax_table have 72 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 13 rows and 8 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 13 rows and 8 columns
## tax_table have 13 rows and 7 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 314 rows and 14 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 314 rows and 14 columns
## tax_table have 314 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 938 rows and 32 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 938 rows and 32 columns
## tax_table have 938 rows and 7 columns

Alpha diversity

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "alpha_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

a.div.index <- function(libraries, p.adj) {
  # libraries =  "16S" (for bacterial library) or "ITS" (for fungal library)
  # p.adj = "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none"

  library(vegan)

  # Loading abundance tables and sample metadata
  asv_tab <- readRDS(file = paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
  asv_tab <- as.data.frame(t(asv_tab))

  sample_data <- read.csv("sample_table.csv",
    sep = ";",
    header = T, row.names = "Sample_ID"
  )
  sample_data <- sample_data[, c(
    "Sample",
    "Species_Compartment",
    "Compartment_2"
  )]

  # Calculation of diversity indices
  N_effectif <- data.frame(
    Species_Compartment = names(table(sample_data$Species_Compartment)),
    N = as.vector(table(sample_data$Species_Compartment))
  )
  colnames(N_effectif)[colnames(N_effectif) == "Species_Compartment"] <- "Species and compartment"
  N_effectif$`Species and compartment` <- gsub("_", " ", N_effectif$`Species and compartment`)
  Observed <- data.frame(Sample = rownames(asv_tab), Observed = rowSums(asv_tab > 0)) # Number of observed ASVs
  Shannon <- data.frame(Sample = rownames(asv_tab), Shannon = diversity(asv_tab, index = "shannon")) # Shannon index
  Invsimpson <- data.frame(Sample = rownames(asv_tab), Invsimpson = diversity(asv_tab, index = "invsimpson")) # Invsimpson index

  library(dplyr)
  library(purrr)

  list_df <- list(Observed, Shannon, Invsimpson, sample_data)
  alpha_div <- reduce(list_df, full_join, by = "Sample") # table with all calculated indices

  # Checking data normality (Shapiro-Wilk)
  print(shapiro.test(alpha_div$Observed))
  print(shapiro.test(alpha_div$Shannon))
  print(shapiro.test(alpha_div$Invsimpson))

  # Kruskal-Wallis test
  print(kruskal.test(alpha_div$Observed ~ alpha_div$Species_Compartment))
  print(kruskal.test(alpha_div$Shannon ~ alpha_div$Species_Compartment))
  print(kruskal.test(alpha_div$Invsimpson ~ alpha_div$Species_Compartment))

  library(rcompanion)
  library(multcompView)
  library(tidyverse)
  library(ggpubr)
  library(ggplot2)
  library(rstatix)
  library(FSA)
  library(knitr)
  library(kableExtra)

  # Computes statistics (Wilcoxon Test)
  # Observed
  stat.observed <- alpha_div %>%
    wilcox_test(Observed ~ Species_Compartment, p.adjust.method = p.adj) %>%
    add_significance()
  stat.observed$Comparison <- paste(stat.observed$group1, stat.observed$group2, sep = "-")
  write.table(stat.observed,
    file = paste0("7_editing_figures/alpha_diversity/stat.observed_", libraries, ".txt"),
    sep = ";", row.names = FALSE
  )
  mean_tab <- setNames(aggregate(Observed ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
  SD <- setNames(aggregate(Observed ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
  stat.observed <- stat.observed[order(stat.observed$p.adj, decreasing = FALSE), ] # Organisation by descending order
  CLD <- cldList(p.adj ~ Comparison, data = stat.observed) # Determination of Tukey letters
  colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
  merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
  merge_tab$mean <- round(merge_tab$mean, 0)
  merge_tab$SD <- round(merge_tab$SD, 0)
  merge_tab$Observed <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
  merge_tab$Compartment_2 <- ifelse(grepl("Leaves|Pulps|Seeds", merge_tab$"Species and compartment"), "Aerial compartments", "Underground compartments")
  merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
    "Pg",
    ifelse(grepl("Ps", merge_tab$`Species and compartment`),
      "Ps", "Bs"
    )
  )
  merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)
  
  # Choose the font
  library(extrafont)
  #font_import(paths = "C:/Windows/Fonts", prompt = FALSE) # Import available fonts (do this only once)
  loadfonts(device = "win")  # For Windows
  loadfonts(device = "pdf")  # For export PDF graphics

  # Plotting cleveland diagram to vizualise the data distribution
  rhizo_tab <- subset(merge_tab, Compartment_2 == "Underground compartments")
  rhizo_tab$`Species and compartment` <- factor(rhizo_tab$`Species and compartment`,
    levels = c(
      "Bulk Soil",
      "Ps Soil",
      "Pg Soil",
      "Ps Roots",
      "Pg Roots"
    )
  )
  aerial_tab <- subset(merge_tab, Compartment_2 == "Aerial compartments")
  aerial_tab$`Species and compartment` <- factor(aerial_tab$`Species and compartment`,
    levels = c(
      "Ps Leaves",
      "Pg Leaves",
      "Ps Pulps",
      "Pg Pulps",
      "Ps Seeds",
      "Pg Seeds"
    )
  )

  rhizo_obs <- ggplot(rhizo_tab) +
    aes(
      x = mean,
      y = `Species and compartment`,
      shape = Species
    ) +
    geom_point(stat = "identity", aes(color = Species), size = 1.5) +
    theme(
      text = element_text(family = "serif", size = 9.5),
      axis.title.y = element_blank(),
      legend.position = "none"
    ) +
    xlab("Observed index") +
    geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
      width = .1,
      position = position_dodge(0.05)
    ) +
    scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
    scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))

  aerial_obs <- ggplot(aerial_tab) +
    aes(
      x = mean,
      y = `Species and compartment`,
      shape = Species
    ) +
    geom_point(stat = "identity", aes(color = Species), size = 1.5) +
    theme(
      text = element_text(family = "serif", size = 9.5),
      axis.title.y = element_blank(),
      legend.position = "none"
    ) +
    xlab("Observed index") +
    geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
      width = .1,
      position = position_dodge(0.05)
    ) +
    scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
    scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))

  Observed <- merge_tab[, c("Species and compartment", "Observed")]

  # Shannon
  stat.shannon <- alpha_div %>%
    wilcox_test(Shannon ~ Species_Compartment, p.adjust.method = p.adj) %>%
    add_significance()
  stat.shannon$Comparison <- paste(stat.shannon$group1, stat.shannon$group2, sep = "-")
  write.table(stat.shannon,
    file = paste0("7_editing_figures/alpha_diversity/stat.shannon_", libraries, ".txt"),
    sep = ";", row.names = FALSE
  )
  mean_tab <- setNames(aggregate(Shannon ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
  SD <- setNames(aggregate(Shannon ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
  stat.shannon <- stat.shannon[order(stat.shannon$p.adj, decreasing = FALSE), ] # Organisation by descending order
  CLD <- cldList(p.adj ~ Comparison, data = stat.shannon) # Determination of Tukey letters
  colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
  merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
  merge_tab$mean <- round(merge_tab$mean, 2)
  merge_tab$SD <- round(merge_tab$SD, 2)
  merge_tab$Shannon <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
  merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
    "Pg",
    ifelse(grepl("Ps", merge_tab$`Species and compartment`),
      "Ps", "Bs"
    )
  )
  merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)

  # Plotting cleveland diagram to vizualise the data distribution
  merge_tab$`Species and compartment` <- factor(merge_tab$`Species and compartment`,
    levels = c(
      "Bulk Soil",
      "Ps Soil",
      "Pg Soil",
      "Ps Roots",
      "Pg Roots",
      "Ps Leaves",
      "Pg Leaves",
      "Ps Pulps",
      "Pg Pulps",
      "Ps Seeds",
      "Pg Seeds"
    )
  )

  distrib_sha <- ggplot(merge_tab) +
    aes(
      x = mean,
      y = `Species and compartment`,
      shape = Species
    ) +
    geom_point(stat = "identity", aes(color = Species), size = 1.5) +
    theme(
      text = element_text(family = "serif", size = 9.5),
      axis.title.y = element_blank(),
      legend.position = "none"
    ) +
    xlab("Shannon index") +
    geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
      width = .1,
      position = position_dodge(0.05)
    ) +
    scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
    scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))

  Shannon <- merge_tab[, c("Species and compartment", "Shannon")]

  # Invsimpson
  stat.invsimpson <- alpha_div %>%
    wilcox_test(Invsimpson ~ Species_Compartment, p.adjust.method = p.adj) %>%
    add_significance()
  stat.invsimpson$Comparison <- paste(stat.invsimpson$group1, stat.invsimpson$group2, sep = "-")
  write.table(stat.invsimpson,
    file = paste0("7_editing_figures/alpha_diversity/stat.invsimpson_", libraries, ".txt"),
    sep = ";", row.names = FALSE
  )
  mean_tab <- setNames(aggregate(Invsimpson ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
  SD <- setNames(aggregate(Invsimpson ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
  stat.invsimpson <- stat.invsimpson[order(stat.invsimpson$p.adj, decreasing = FALSE), ] # Organisation by descending order
  CLD <- cldList(p.adj ~ Comparison, data = stat.invsimpson) # Determination of Tukey letters
  colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
  merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
  merge_tab$mean <- round(merge_tab$mean, 2)
  merge_tab$SD <- round(merge_tab$SD, 2)
  merge_tab$Invsimpson <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
  merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
    "Pg",
    ifelse(grepl("Ps", merge_tab$`Species and compartment`),
      "Ps", "Bs"
    )
  )
  merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)

  # Plotting cleveland diagram to vizualise the data distribution
  merge_tab$`Species and compartment` <- factor(merge_tab$`Species and compartment`,
    levels = c(
      "Bulk Soil",
      "Ps Soil",
      "Pg Soil",
      "Ps Roots",
      "Pg Roots",
      "Ps Leaves",
      "Pg Leaves",
      "Ps Pulps",
      "Pg Pulps",
      "Ps Seeds",
      "Pg Seeds"
    )
  )

  distrib_invsi <- ggplot(merge_tab) +
    aes(
      x = mean,
      y = `Species and compartment`,
      shape = Species
    ) +
    geom_point(stat = "identity", aes(color = Species), size = 1.5) +
    theme(
      text = element_text(family = "serif", size = 9.5),
      axis.title.y = element_blank(),
      axis.text.y = element_blank()
    ) +
    xlab("Invsimpson index") +
    geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
      width = .1,
      position = position_dodge(0.05)
    ) +
    scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
    scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))

  Invsimpson <- merge_tab[, c("Species and compartment", "Invsimpson")]

  # Editing final table with all results
  list_df <- list(N_effectif, Observed, Shannon, Invsimpson)
  alpha_vf <- reduce(list_df, full_join, by = "Species and compartment")

  order <- c("Pg Seeds", "Ps Seeds", "Pg Pulps", "Ps Pulps", "Pg Leaves", "Ps Leaves", "Pg Roots", "Ps Roots", "Pg Soil", "Ps Soil", "Bulk Soil")
  alpha_vf <- alpha_vf[match(order, alpha_vf$"Species and compartment"), ]

  write.table(alpha_vf, file = paste0("7_editing_figures/alpha_diversity/alpha_div_", libraries, ".txt"), sep = ";", row.names = FALSE)

  # Display table of results
  library("DT")
  if (libraries == "16S") {
    title <- "Alpha diversity indices (bacterial communities)"
  } else {
    title <- "Alpha diversity indices (fungal communities)"
  }

  print(datatable(alpha_vf, width = NULL, height = NULL, rownames = FALSE, caption = title))

  # Editing final plot
  index_1 <- ggarrange(aerial_obs, rhizo_obs,
    nrow = 2, ncol = 1
  )
  index_2 <- ggarrange(index_1, distrib_sha, distrib_invsi,
    nrow = 1, ncol = 3
  )

  if (libraries == "16S") {
    title <- "(a) Distribution of alpha diversity indices (bacterial communities)"
  } else {
    title <- "(b) Distribution of alpha diversity indices (fungal communities)"
  }

  # annotate_figure(index_2, top = text_grob(title, color = "black", face = "bold", size = 12))

  # Save figure
  ggsave(paste0("7_editing_figures/alpha_diversity/fig_obs_sha_invsi_", libraries, ".pdf"),
    plot = last_plot(),
    width = 15,
    height = 7.5,
    units = "cm",
    dpi = 800
  )
  # annotate_figure(index_2, top = text_grob(title, color = "black", face = "bold", size = 12))
}

a.div.index("16S", "fdr")
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Observed
## W = 0.85926, p-value = 8.052e-10
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Shannon
## W = 0.82451, p-value = 3.296e-11
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Invsimpson
## W = 0.69735, p-value = 4.37e-15
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Observed by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 115.24, df = 10, p-value < 2.2e-16
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Shannon by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 123.49, df = 10, p-value < 2.2e-16
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Invsimpson by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 126.31, df = 10, p-value < 2.2e-16
a.div.index("ITS", "fdr")
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Observed
## W = 0.92619, p-value = 7.91e-07
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Shannon
## W = 0.89834, p-value = 1.645e-08
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  alpha_div$Invsimpson
## W = 0.76572, p-value = 6.102e-14
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Observed by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 139.8, df = 10, p-value < 2.2e-16
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Shannon by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 136.04, df = 10, p-value < 2.2e-16
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  alpha_div$Invsimpson by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 134.64, df = 10, p-value < 2.2e-16

Beta diversity

MDS

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "beta_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

# Choice of colours
c1 <- "#24445C" # Seeds Pg
c2 <- "#800080" # Pulps Pg
c3 <- "#85C17E" # Leaves Pg
c4 <- "#DAB30A" # Roots Pg
c5 <- "#5B3C11" # Soil Pg

c6 <- "#8EA2C6" # Seeds Ps
c7 <- "#C71585" # Pulps Ps
c8 <- "#B0F2B6" # Leaves Ps
c9 <- "#E7F00D" # Roots Ps
c10 <- "#8B6C42" # Soil Ps

c11 <- "black" # Bulk Soil

all_MDS <- function(libraries, method, nk) {
  # libraries =  "16S" (for bacterial library) or "ITS" (for fungal library)
  # method = "morisita", "manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord", "hellinger", "aitchison", or "robust.aitchison"
  # nk = maximum dimension of the space which the data are to be represented in

  library(microeco)
  library(magrittr)
  library(Biostrings)
  library(vegan)
  library(plotly)
  library(webshot)
  library(magick)

  # Loading abundance table and sample metadata
  asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
  sample_data <- read.csv("sample_table.csv",
    sep = ";",
    header = T, row.names = "Sample_ID"
  )

  DS <- microtable$new(
    otu_table = asv_tab,
    sample_table = sample_data
  )
  print(DS)
  DS$tidy_dataset()
  print(DS)

  sample_info <- DS$sample_table
  sample_info$Species_Compartment <- gsub("_", " ", sample_info$Species_Compartment)
  asvTab <- data.frame(t(DS$otu_table)) # Échantillons en ligne

  # Calculating the distance matrix
  matrix_morisita <- as.matrix(vegdist(asvTab, method = method))
  write.table(matrix_morisita,
    file = paste0("7_editing_figures/beta_diversity/matrix_", method, "_", libraries, ".csv"),
    sep = ";",
    col.names = NA
  )

  # MDS
  prin_comp <- cmdscale(matrix_morisita, k = nk, eig = TRUE)

  # Calculate the proportion of variance explained by each component
  explained_variance_ratio <- prin_comp$eig / sum(prin_comp$eig) * 100
  explained_variance_ratio <- round(explained_variance_ratio, 1)

  # Creation of the dataframe with the principal components for the final figure
  components <- data.frame(
    Axis1 = prin_comp$points[, 1],
    Axis2 = prin_comp$points[, 2],
    Axis3 = prin_comp$points[, 3]
  )

  # Adding information to the "components" dataframe
  components$Species_Compartment <- sample_info$Species_Compartment

  # Reversing the axes
  components$Axis1 <- -components$Axis1
  components$Axis2 <- -components$Axis2
  components$Axis3 <- -components$Axis3

  # Create an interactive 3D point cloud plot
  components$Species_Compartment <- factor(components$Species_Compartment,
    levels = c(
      "Pg Seeds", "Pg Pulps", "Pg Leaves", "Pg Roots", "Pg Soil",
      "Ps Seeds", "Ps Pulps", "Ps Leaves", "Ps Roots", "Ps Soil",
      "Bulk Soil"
    )
  )
  if (libraries == "16S") {
    title <- "(a) MDS analysis of bacterial samples"
  } else if (libraries == "ITS") {
    title <- "(b) MDS analysis of fungal samples"
  }


  fig_species_compartment <- plot_ly(components,
    x = ~Axis1,
    y = ~Axis2, z = ~Axis3,
    color = ~Species_Compartment,
    colors = c(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11),
    type = "scatter3d", mode = "markers",
    marker = list(size = 7)
  ) %>%
    layout(
      title = title,
      scene = list(
        xaxis = list(title = paste("Axis 1 (", toString(explained_variance_ratio[1]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12)),
        yaxis = list(title = paste("Axis 2 (", toString(explained_variance_ratio[2]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12)),
        zaxis = list(title = paste("Axis 3 (", toString(explained_variance_ratio[3]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12))
      ),
      legend = list(title = list(text = "Species and compartment"))
    )

  fig_species_compartment
}

all_MDS("16S", "morisita", 3)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
all_MDS("ITS", "morisita", 3)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns

Hierarchical Clustering (HC)

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "beta_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

dend.cah <- function(libraries, method, distance, clusters) {
  # libraries =  "16S" (for bacterial library) or "ITS" (for fungal library)
  # method = "morisita", "manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord", "hellinger", "aitchison", or "robust.aitchison"
  # distance = "ward.D2", "ward.D", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC)
  # clusters = number of clusters

  library(ade4)
  library(vegan)
  library(gclus)
  library(cluster)
  library(RColorBrewer)
  library(MASS)
  library(extrafont)
  loadfonts(device = "pdf")

  # Loading abundance table
  asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
  asv_tab <- t(asv_tab)
  asv_tab <- as.data.frame(asv_tab)

  # Distance matrix
  matrix.d <- vegdist(asv_tab, method = method)
  # Classification
  matrix.cw <- hclust(matrix.d, distance)
  # Group reorganisation
  matrix.cwo <- reorder.hclust(matrix.cw, matrix.d)

  # Merging levels graph
  if (libraries == "16S") {
    title <- "(a) Merge fusion (bacterial samples)"
  } else {
    title <- "(b) Merge fusion (fungal samples)"
  }
  
  png(filename = paste0("7_editing_figures/beta_diversity/merge_levels_", libraries,".png"),
      width = 15, height = 15,
      units = "cm", res = 600)
  plot(sort(matrix.cw$height), nrow(asv_tab):2,
       type="S",
       main= title,
       ylab="k (number of clusters)",
       xlab="h (node height)", col="grey",
       cex.main = 1,
       cex.axis = 0.75)

  text(sort(matrix.cw $height), nrow(asv_tab):2, nrow(asv_tab):2,
       col="red", cex=0.8)
  dev.off()

  # Plotting the dendrogram and k classes
  #   if (libraries == "16S") {
  #   title = "Dendrogram of Hierarchical Clustering of Samples Based on ASV Composition (bacterial samples)"
  # } else {
  #   title = "Dendrogram of Hierarchical Clustering of Samples Based on ASV Composition (fungal samples)"
  # }

  pdf(
    file = paste0("7_editing_figures/beta_diversity/dendrogram_", method, "_", clusters, "_", libraries, ".pdf"),
    width = 8.8, height = 6.6
  )
  # Part 1: drawing the dendrogram and rectangles
  plot(matrix.cwo,
    hang = -1, cex = 0.25, cex.axis = 0.75,
    ylab = "Distance", xlab = paste(nrow(asv_tab), " samples"),
    main = NULL,
    sub = paste(clusters, " clusters (k)")
  )
  rect.hclust(matrix.cwo, k = clusters, border = 1:clusters)

  # Part 2: adding labels to rectangles
  if (libraries == "16S") {
    text(
      x = 17, y = -0.7,
      labels = "k1_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 49, y = -0.7,
      labels = "k2_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 72, y = -0.7,
      labels = "k3_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 88, y = -0.7,
      labels = "k4_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 100.5, y = -0.7,
      labels = "k5_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 109, y = -0.7,
      labels = "k6_B", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 114, y = -0.7,
      labels = "k7_B", pos = 3, font = 2, cex = 0.25
    )
    text(
      x = 123.5, y = -0.7,
      labels = "k8_B", pos = 3, font = 2, cex = 0.5
    )

    # Part 3: add the horizontal line and its label
    abline(h = matrix.cwo$height[length(matrix.cwo$height) - 2], lty = 2, col = "red", lwd = 1)
    text(x = 1.8, y = round(matrix.cwo$height[length(matrix.cwo$height) - 2], 0)-0.05, labels = paste0("distance = ", round(matrix.cwo$height[length(matrix.cwo$height) - 2], 2)), pos = 3, col = "red", cex = 0.5)
  } else if (libraries == "ITS") {
    text(
      x = 8, y = -0.64,
      labels = "k1_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 25, y = -0.64,
      labels = "k2_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 41, y = -0.64,
      labels = "k3_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 56, y = -0.64,
      labels = "k4_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 71, y = -0.64,
      labels = "k5_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 82, y = -0.64,
      labels = "k6_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 90, y = -0.64,
      labels = "k7_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 102.5, y = -0.64,
      labels = "k8_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 120, y = -0.64,
      labels = "k9_F", pos = 3, font = 2, cex = 0.5
    )
    text(
      x = 137.5, y = -0.64,
      labels = "k10_F", pos = 3, font = 2, cex = 0.5
    )

    # Part 3: add the horizontal line and its label
    abline(h = matrix.cwo$height[length(matrix.cwo$height) - 2], lty = 2, col = "red", lwd = 1)
    text(x = 1.3, y = round(matrix.cwo$height[length(matrix.cwo$height) - 2], 0)-0.05, labels = paste0("distance = ", round(matrix.cwo$height[length(matrix.cwo$height) - 2], 1)), pos = 3, col = "red", cex = 0.5)
  }
  dev.off()
}

dend.cah("16S", "morisita", "ward.D2", 8)
## png 
##   2
dend.cah("ITS", "morisita", "ward.D2", 10)
## png 
##   2

Relative abundances (barplot)

Figures

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "barplots"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

barplot.fig <- function(libraries, subset, type, level, nb) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # subset = "all.sp_all.asv"
  # type = "ASV"
  # level = "Phylum", "Class", "Family", "Order", Genus" or "Species"
  # nb = number of taxa in legend

  library(microeco)
  library(magrittr)
  library(Biostrings)
  library(ggplot2)

  # Loading tables
  abund_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_", subset, "_", libraries, ".RDS"))
  tab <- readRDS(paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS"))
  sample_data <- read.csv("sample_table.csv",
    sep = ";",
    header = T, row.names = "Sample_ID"
  )

  DS <- microtable$new(
    otu_table = abund_tab,
    tax_table = tab,
    sample_table = sample_data
  )
  print(DS)
  DS$tidy_dataset()
  print(DS)

  # Calculating relative abundance
  t1 <- trans_abund$new(
    dataset = DS,
    taxrank = level,
    ntaxa = nb,
    groupmean = "Species_Compartment"
  )

  t2 <- trans_abund$new(
    dataset = DS,
    taxrank = level,
    ntaxa = nb
  )
  saveRDS(t2$data_abund, file = paste0("7_editing_figures/barplots/row_data_abund_", subset, "_subset_", level, "_", libraries, ".RDS"))

  # Saving data abundance table
  write.table(t1$data_abund, file = paste0("7_editing_figures/barplots/mean_sd_abund_", subset, "_subset_", level, "_", libraries, ".txt"), sep = "\t", col.names = NA)

  library(dplyr)

  # Reorganising columns
  t1$data_abund <- t1$data_abund %>%
    mutate(Species = ifelse(grepl("Pg", Sample), "Pg",
      ifelse(grepl("Ps", Sample), "Ps", "Bs")
    ))

  t1$data_abund <- t1$data_abund %>%
    mutate(Compartment = ifelse(grepl("Leaves", Sample), "Leaves",
      ifelse(grepl("Pulps", Sample), "Pulps",
        ifelse(grepl("Seeds", Sample), "Seeds",
          ifelse(grepl("Roots", Sample), "Roots", "Soil")
        )
      )
    ))
  t1$data_abund$Compartment <- factor(t1$data_abund$Compartment, levels = c("Seeds", "Pulps", "Leaves", "Roots", "Soil"))

  # Plotting relative abundances
  fig <- t1$plot_bar(
    others_color = "grey90",
    facet = c("Compartment", "Species"),
    barwidth = 3,
    xtext_keep = FALSE,
    strip_text = 12,
    legend_text_italic = TRUE,
    ytitle_size = 12
  ) +
    theme(
      text = element_text(family = "serif", size = 12),
      axis.text = element_text(size = 10),
      legend.key.height = unit(0.75, "lines"),
      legend.key.width = unit(0.75, "lines"),
      legend.position = "right",
      plot.margin = margin(t = 0)
    ) +
    guides(fill = guide_legend(title = level, ncol = 1))

  ggsave(paste0("7_editing_figures/barplots/plot_", subset, "_", level, "_subset_", libraries, ".pdf"),
    plot = fig,
    device = "pdf",
    width = 170,
    height = 113,
    units = "mm",
    dpi = 1200
  )
  fig
}

barplot.fig("16S", "all.sp_all.asv", "ASV", "Phylum", 10)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns

barplot.fig("ITS", "all.sp_all.asv", "ASV", "Phylum", 10)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns

barplot.fig("16S", "all.sp_all.asv", "ASV", "Class", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns

barplot.fig("ITS", "all.sp_all.asv", "ASV", "Class", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns

barplot.fig("16S", "all.sp_all.asv", "ASV", "Genus", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns

barplot.fig("ITS", "all.sp_all.asv", "ASV", "Genus", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns

Statistics

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Get stats for relative abundances
get.stats <- function(libraries, type, subset, level) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # type = "ASV"
  # subset = "all.sp_all.asv"
  # level = "Phylum", "Class", "Family", "Order", Genus" or "Species"

  library(vegan)
  library(rstatix)

  abund_tab <- readRDS(paste0("7_editing_figures/barplots/row_data_abund_", subset, "_subset_", level, "_", libraries, ".RDS"))

  # Remove taxa with an abundance of 0
  abund <- abund_tab[abund_tab$Abundance > 0, ]
  abund$ID <- paste0(abund$Taxonomy, "_", abund$Species_Compartment)

  data_keep <- c()
  taxa_list <- unique(abund$ID)

  # Remove "taxa" with only 1 or 2 replicates to perform the statistical tests
  for (i in taxa_list) {
    nbval <- nrow(abund[abund$ID == i, ])
    if (nbval >= 3) {
      data_keep <- c(data_keep, i)
    }
  }
  abund_filt <- abund[abund$ID %in% data_keep, ]
  
  # Check the normality of data
  test_norm <- shapiro.test(abund_filt$Abundance)

  # Get statistics  
  if (test_norm$p.value <= 0.05) {
    stats_abund <- abund_filt %>%
      wilcox_test(Abundance ~ ID, p.adjust.method = "fdr") %>%
      add_significance()
  } else {
    print("Data follows the normality, do an ANOVA.")
    return()
  }

  # Saving table
  write.table(stats_abund,
    file = paste0("7_editing_figures/barplots/stats_abund_", subset, "_subset_", level, "_", libraries, ".txt"),
    sep = "\t",
    col.names = NA
  )
}

get.stats("16S", "ASV", "all.sp_all.asv", "Class")
get.stats("ITS", "ASV", "all.sp_all.asv", "Class")

Biomarkers (LEfSe analysis)

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "LEfSe"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

lefse.analysis <- function(libraries, subset, type, level) {
  # libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
  # subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
  # type = "ASV"
  # level = "Phylum", "Class", "Family", "Order", Genus" or "Species"

  library(microeco)
  library(magrittr)
  library(Biostrings)
  library(ggplot2)

  tryCatch(
    {
      # Loading tables
      path <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", libraries, ".RDS")
      if (file.exists(path)) {
        abund_tab <- readRDS(path)
      } else {
        print("No available data. Skip the LEfSe analysis.")
        return()
      }

      tab <- readRDS(paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", libraries, ".RDS"))
      if (length(tab) == 1) {
        print("Not enough data. Skip th LEfSe analysis.")
        return()
      }
      sample_data <- read.csv("sample_table.csv",
        sep = ";",
        header = T, row.names = "Sample_ID"
      )

      DS <- microtable$new(
        otu_table = abund_tab,
        tax_table = tab,
        sample_table = sample_data
      )
      print(DS)
      DS$tidy_dataset()
      print(DS)

      # LEfSe analysis
      t1 <- trans_diff$new(
        dataset = DS,
        method = "lefse",
        group = "Compartment",
        alpha = 0.05,
        lefse_subgroup = NULL,
        taxa_level = paste0(level)
      )

      # Saving LEfSe results
      saveRDS(t1$res_diff, file = paste0("7_editing_figures/LEfSe/lefse_", subset, "_subset_", level, "_", libraries, ".RDS"))
      write.table(t1$res_diff,
        file = paste0("7_editing_figures/LEfSe/lefse_", subset, "_subset_", level, "_", libraries, ".txt"),
        sep = "\t", col.names = NA
      )

      # Display LEfSe results
      library(DT)
      print(datatable(t1$res_diff,
        width = NULL, height = NULL, rownames = FALSE,
        caption = paste0(libraries, " ", subset, " ", type, " ", level)
      ))
    },
    error = function(e) {
      if (inherits(e, "error") && grepl("No significant feature found!", e$message)) {
        cat("No significant taxa. Skip the LEfSe analysis.\n")
        return()
      } else if (inherits(e, "error") && grepl("all observations are in the same group", e$message)) {
        cat("All observation are in the same group. Skip the LEfSe analysis.\n")
        return()
      } else {
        stop(e)
      }
    }
  )
}

lefse.analysis("16S", "Pg-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 67 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 198 input features ...
## 198 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 65 taxa found significant ...
## After P value adjustment, 47 taxa found significant ...
## Minimum LDA score: 3.99588516519995 maximum LDA score: 5.72372200421366
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
lefse.analysis("16S", "Ps-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 64 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 240 input features ...
## 240 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 92 taxa found significant ...
## After P value adjustment, 76 taxa found significant ...
## Minimum LDA score: 3.64565560661862 maximum LDA score: 5.65051237769098
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...

lefse.analysis("ITS", "Pg-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 83 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 307 input features ...
## 307 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 94 taxa found significant ...
## After P value adjustment, 81 taxa found significant ...
## Minimum LDA score: 3.09885276866572 maximum LDA score: 5.64114530747946
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
lefse.analysis("ITS", "Ps-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 78 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 434 input features ...
## 434 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 163 taxa found significant ...
## After P value adjustment, 139 taxa found significant ...
## Minimum LDA score: 2.76808901753798 maximum LDA score: 5.64156415348689
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...

Co-occurence networks

This part of the script is an adaptation of the script by Cheng Gao used in the following article published in Nature in 2022: Gao, Cheng, et al. « Co-Occurrence Networks Reveal More Complexity than Community Composition in Resistance and Resilience of Microbial Communities ». Nature Communications, vol. 13, no 1, July 2022, p. 3867. DOI.org (Crossref), https://doi.org/10.1038/s41467-022-31343-y.

Github link: https://github.com/ChengGaoBerkeley/EPICON.FunBac/blob/main/Fig.3.Fig.S4.Fig.S5.Fig.S6.Fig.S7.Fig.S8.Fig.S9.NatCom.Spman.network.2022.05.31.Rmd

Creation of BF (Bacterial and Fungi) subtables

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "co-occurence_network"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))

path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "BF_tables"
if (!file.exists(file.path(path, newfolder))) {
  dir.create(file.path(path, newfolder))
}

BF.tab <- function(subset, compartment, type) {
  # subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
  # compartment = "seeds", "pulps", "leaves", "roots" or "soil"
  # type = "ASV"

  library(dplyr)

  # Creating BF abundance tables
  path_16S <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", compartment, "_16S.RDS")
  if (file.exists(path_16S)) tab_16S <- readRDS(file = path_16S)
  path_ITS <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", compartment, "_ITS.RDS")
  if (file.exists(path_ITS)) tab_ITS <- readRDS(file = path_ITS)

  if (!exists("tab_16S") || !exists("tab_ITS")) {
    print("One of the dataframe does not exist. Skip the co-occurence network analysis.")
    return()
  }
  if (!exists("tab_16S") && !exists("tab_ITS")) {
    print("Dataframes does not exist. Skip the co-occurence network analysis.")
    return()
  }

  # Keep only samples in common
  tab_16S <- dplyr::select(tab_16S, intersect(names(tab_16S), names(tab_ITS)))
  tab_ITS <- dplyr::select(tab_ITS, intersect(names(tab_16S), names(tab_ITS)))

  BF_abund_tab <- bind_rows(tab_16S, tab_ITS)

  if (ncol(BF_abund_tab) == 0) {
    print("No samples in common. Skip the co-occurence network analysis.")
    return()
  }

  # Creating BF tables
  path_16S <- paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", compartment, "_16S.RDS")
  if (file.exists(path_16S)) tab_16S <- readRDS(file = path_16S)
  path_ITS <- paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", compartment, "_ITS.RDS")
  if (file.exists(path_ITS)) tab_ITS <- readRDS(file = path_ITS)

  if (!exists("tab_16S") || !exists("tab_ITS")) {
    print("One of the dataframe does not exist. Skip the co-occurence network analysis.")
    return()
  }

  BF_tab <- bind_rows(tab_16S, tab_ITS)
  BF_tab$ASV_ID <- row.names(BF_tab)

  # Making all files consistent
  library(microeco)
  DS <- microtable$new(
    otu_table = BF_abund_tab,
    tax_table = BF_tab
  )
  print(DS)
  DS$tidy_dataset()
  print(DS)

  # Saving tables
  BF_abund <- data.frame(t(DS$otu_table))
  saveRDS(BF_abund, file = paste0("7_editing_figures/co-occurence_network/BF_tables/abund_tab_", subset, "_subset_", compartment, "_BF.RDS"))
  saveRDS(DS$tax_table, file = paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS"))
}

BF.tab("Pg-spec", "leaves", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 63 rows and 8 columns
## tax_table have 100 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 63 rows and 8 columns
## tax_table have 63 rows and 8 columns
BF.tab("Pg-spec", "pulps", "ASV")
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 73 rows and 10 columns
## tax_table have 73 rows and 8 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 73 rows and 10 columns
## tax_table have 73 rows and 8 columns
BF.tab("Pg-spec", "seeds", "ASV")
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 5 rows and 1 columns
## tax_table have 28 rows and 8 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 5 rows and 1 columns
## tax_table have 5 rows and 8 columns
BF.tab("Pg-spec", "roots", "ASV")
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 616 rows and 16 columns
## tax_table have 616 rows and 8 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 616 rows and 16 columns
## tax_table have 616 rows and 8 columns
BF.tab("Pg-spec", "soil", "ASV")
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1151 rows and 32 columns
## tax_table have 1151 rows and 8 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1151 rows and 32 columns
## tax_table have 1151 rows and 8 columns
BF.tab("Ps-spec", "leaves", "ASV")
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 48 rows and 1 columns
## tax_table have 476 rows and 8 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 48 rows and 1 columns
## tax_table have 48 rows and 8 columns
BF.tab("Ps-spec", "pulps", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 96 rows and 8 columns
## tax_table have 96 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 96 rows and 8 columns
## tax_table have 96 rows and 8 columns
BF.tab("Ps-spec", "seeds", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 8 columns
BF.tab("Ps-spec", "roots", "ASV")
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 611 rows and 14 columns
## tax_table have 621 rows and 8 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 611 rows and 14 columns
## tax_table have 611 rows and 8 columns
BF.tab("Ps-spec", "soil", "ASV")
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1760 rows and 32 columns
## tax_table have 1760 rows and 8 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1760 rows and 32 columns
## tax_table have 1760 rows and 8 columns

Spearman correlation test

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "spearman_correlation"
if (!file.exists(file.path(path, newfolder))) {
  dir.create(file.path(path, newfolder))
}

SpMan <- function(subset, compartment, type) {
  # subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
  # compartment = "seeds", "pulps", "leaves", "roots" or "soil"
  # type = "ASV"

  library(vegan)
  library(psych)

  # Loading BF abundance table
  path <- paste0("7_editing_figures/co-occurence_network/BF_tables/abund_tab_", subset, "_subset_", compartment, "_BF.RDS")
  if (file.exists(path)) {
    df <- readRDS(path)
  } else {
    print("No data available. Skip the co-occurence network analysis.")
    return()
  }

  if (nrow(df) <= 1) {
    print("Not enough data to proceed the following steps.")
    return()
  } else {
    # Correlation test
    spman.df <- corr.test(df, use = "pairwise", method = "spearman", adjust = "fdr", alpha = .05, ci = FALSE)

    # Saving results
    saveRDS(spman.df, paste0("7_editing_figures/co-occurence_network/spearman_correlation/SpMan_", subset, "_subset_", compartment, "_BF.RDS"))
  }
}

SpMan("Pg-spec", "soil", "ASV")
SpMan("Pg-spec", "roots", "ASV")
SpMan("Pg-spec", "leaves", "ASV")
SpMan("Pg-spec", "pulps", "ASV")
SpMan("Pg-spec", "seeds", "ASV")
## [1] "Not enough data to proceed the following steps."
## NULL
SpMan("Ps-spec", "soil", "ASV")
SpMan("Ps-spec", "roots", "ASV")
SpMan("Ps-spec", "leaves", "ASV")
## [1] "Not enough data to proceed the following steps."
## NULL
SpMan("Ps-spec", "pulps", "ASV")
SpMan("Ps-spec", "seeds", "ASV")

Significant positive correlations

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "SpMan_Rsig"
if (!file.exists(file.path(path, newfolder))) {
  dir.create(file.path(path, newfolder))
}

r.cutoff <- 0.6 # R² threshold
p.cutoff <- 0.05 # p-value threshold

SpMan.Rsig <- function(subset, compartment, type) {
  # subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
  # compartment = "seeds", "pulps", "leaves", "roots" or "soil"
  # type = "ASV"

  # Loading corr.test results
  path <- paste0("7_editing_figures/co-occurence_network/spearman_correlation/SpMan_", subset, "_subset_", compartment, "_BF.RDS")
  if (file.exists(path)) {
    df <- readRDS(path)
  } else {
    print("No data available. Skip the co-occurence network analysis.")
    return()
  }

  Cor <- as.matrix(df$r)
  # Keep the upper triangular part of the correlation matrix
  Cor.df <- data.frame(
    row = rownames(Cor)[row(Cor)[upper.tri(Cor)]],
    col = colnames(Cor)[col(Cor)[upper.tri(Cor)]],
    Cor = Cor[upper.tri(Cor)]
  )

  P0 <- as.matrix(df$p)
  P.df <- data.frame(
    row = rownames(P0)[row(P0)[upper.tri(P0)]],
    col = colnames(P0)[col(P0)[upper.tri(P0)]],
    p = P0[upper.tri(P0)]
  )

  df2 <- data.frame(Cor.df, P.df, Subset = subset, Compartment = compartment, Cross = "BF")
  # Keep data according to p-value and R² thresholds
  da.tmp <- df.sig <- df2[df2$Cor > r.cutoff & df2$p < p.cutoff, ]
  V1 <- data.frame("v1" = da.tmp$row)
  V2 <- data.frame("v2" = da.tmp$col)

  # Loading table
  ID.tmp <- readRDS(paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS"))

  IDsub1 <- ID.tmp[ID.tmp$ASV_ID %in% V1$v1, ]
  IDsub2 <- ID.tmp[ID.tmp$ASV_ID %in% V2$v2, ]
  V1$id <- 1:nrow(V1)
  V2$id <- 1:nrow(V2)

  M1 <- merge(V1, IDsub1, by.x = "v1", by.y = "ASV_ID", all.x = T)
  M1 <- M1[order(M1$id), ]
  M2 <- merge(V2, IDsub2, by.x = "v2", by.y = "ASV_ID", all.x = T)
  M2 <- M2[order(M2$id), ]
  df.tmp <- data.frame(da.tmp, M1, M2)


  # Saving the significant correlations
  saveRDS(df.tmp, paste0("7_editing_figures/co-occurence_network/SpMan_Rsig/SpMan_Rsig_", subset, "_subset_", compartment, "_BF.RDS"))
}

SpMan.Rsig("Pg-spec", "soil", "ASV")
SpMan.Rsig("Pg-spec", "roots", "ASV")
SpMan.Rsig("Pg-spec", "leaves", "ASV")
SpMan.Rsig("Pg-spec", "pulps", "ASV")
SpMan.Rsig("Pg-spec", "seeds", "ASV")
## [1] "No data available. Skip the co-occurence network analysis."
## NULL
SpMan.Rsig("Ps-spec", "soil", "ASV")
SpMan.Rsig("Ps-spec", "roots", "ASV")
SpMan.Rsig("Ps-spec", "leaves", "ASV")
## [1] "No data available. Skip the co-occurence network analysis."
## NULL
SpMan.Rsig("Ps-spec", "pulps", "ASV")
SpMan.Rsig("Ps-spec", "seeds", "ASV")

Plotting co-occurence network Bacteria x Fungi (ASV)

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "SpMan_network"
if (!file.exists(file.path(path, newfolder))) {
  dir.create(file.path(path, newfolder))
}

path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network/SpMan_network"
newfolder_1 <- "property"
newfolder_2 <- "module_details"
newfolder_3 <- "figures"
if (!file.exists(file.path(path, newfolder_1))) {
  dir.create(file.path(path, newfolder_1))
}
if (!file.exists(file.path(path, newfolder_2))) {
  dir.create(file.path(path, newfolder_2))
}
if (!file.exists(file.path(path, newfolder_3))) {
  dir.create(file.path(path, newfolder_3))
}

bac.fung.cross.net <- function(subset, compartment, type, edges) {
  # subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
  # compartment = "seeds", "pulps", "leaves", "roots" or "soil"
  # type = "ASV"
  # edges = "B" (links between bacterial ASVs), "F" (links between fungal ASVs), "BF" (links between bacterial and fungal ASVs) or "all" (all links)

  library(igraph)
  library(graphics)

  # Loading data
  path <- paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS")
  if (file.exists(path)) {
    ID.tmp <- readRDS(path)
  } else {
    print("No data available. Skip the co-occurence network analysis.")
    return()
  }

  path <- paste0("7_editing_figures/co-occurence_network/SpMan_Rsig/SpMan_Rsig_", subset, "_subset_", compartment, "_BF.RDS")
  if (file.exists(path)) {
    da <- readRDS(path)
  } else {
    print("Not enough data. Skip the co-occurence network analysis.")
    return()
  }

  # Creating graph
  g <- graph.data.frame(da, directed = FALSE)

  # Color edition
  g.color <- droplevels(ID.tmp[ID.tmp$ASV_ID %in% V(g)$name, ]) # Colour filtering from ID.tmp
  g.color <- g.color[match(V(g)$name, g.color$ASV_ID), ]
  g.color$Kingdom <- factor(g.color$Kingdom, labels = c("black", "#D13C32"))
  levels(g.color$Kingdom) <- c("black", "#D13C32")

  V(g)$color <- as.character(g.color$Kingdom) # Assigning colours to graph nodes

  num.edges <- length(E(g))
  num.vertices <- length(V(g))
  connectance <- edge_density(g, loops = FALSE)
  average.degree <- mean(igraph::degree(g))
  average.path.length <- average.path.length(g)
  diameter <- diameter(g, directed = FALSE, unconnected = TRUE, weights = NULL)
  edge.connectivity <- edge_connectivity(g)
  clustering.coefficient <- transitivity(g)
  no.clusters <- no.clusters(g)
  centralization.betweenness <- centralization.betweenness(g)$centralization
  centralization.degree <- centralization.degree(g)$centralization
  fun.fc <- cluster_fast_greedy(g)
  Modularity <- modularity(fun.fc, membership(fun.fc))
  No.modules <- nrow(data.frame(sizes(fun.fc)))

  # Summary table of graph properties
  df.tmp <- data.frame(
    network = "BF-FF-BB", cross = "Bacteria - Fungi", subset, compartment,
    num.edges, num.vertices, connectance, average.degree,
    average.path.length, diameter, edge.connectivity,
    clustering.coefficient, no.clusters,
    centralization.betweenness, centralization.degree,
    Modularity, No.modules
  )

  # Saving the summary table
  saveRDS(df.tmp, paste0("7_editing_figures/co-occurence_network/SpMan_network/property/SpMan_network_property_BF-BB-FF_", subset, "_subset_", compartment, "_BF.RDS"))

  g.E <- data.frame(get.edgelist(g))
  names(g.E) <- c("V1", "V2")

  V1 <- data.frame("v1" = g.E$V1)
  V2 <- data.frame("v2" = g.E$V2)

  ID.tmpx <- ID.tmp[, c("ASV_ID", "Kingdom")]
  IDsub1 <- ID.tmpx[ID.tmpx$ASV_ID %in% V1$v1, ]
  IDsub2 <- ID.tmpx[ID.tmpx$ASV_ID %in% V2$v2, ]
  V1$id <- 1:nrow(V1)
  V2$id <- 1:nrow(V2)

  M1 <- merge(V1, IDsub1, by.x = "v1", by.y = "ASV_ID", all.x = T)
  M1 <- M1[order(M1$id), ]
  M2 <- merge(V2, IDsub2, by.x = "v2", by.y = "ASV_ID", all.x = T)
  M2 <- M2[order(M2$id), ]

  if (edges == "B") {
    M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Bacteria"] <- "salmon1"
  } else if (edges == "F") {
    M1$BF[M1$Kingdom == "k__Fungi" & M2$Kingdom == "k__Fungi"] <- "paleturquoise3"
  } else if (edges == "BF") {
    M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Fungi"] <- "grey"
  } else if (edges == "all") {
    M1$BF <- "#8FB08A"
    M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Bacteria"] <- "black"
    M1$BF[M1$Kingdom == "k__Fungi" & M2$Kingdom == "k__Fungi"] <- "#FF7F7F"
  }

  E(g)$color <- as.character(M1$BF)

  # Plotting graph
  set.seed(123)
  svg(paste0("7_editing_figures/co-occurence_network/SpMan_network/figures/", subset, "_subset_", compartment, "_", edges, ".svg"), width = 5, height = 5)
  plot(g, edge.width = 1.5, vertex.frame.color = NA, vertex.label = NA, edge.lty = 1.5, edge.curved = T, vertex.size = 5, vertex.shape = "circle")
  dev.off()
  plot(g, edge.width = 1.5, vertex.frame.color = NA, vertex.label = NA, edge.lty = 1.5, edge.curved = T, vertex.size = 3.5, vertex.shape = "circle")

  # Gathering information on modules
  mod_list <- cluster_fast_greedy(g) # List of modules on the graph
  table(membership(mod_list)) # Number of nodes per graph module

  # Extract information on module membership
  module_membership <- as.integer(membership(mod_list))

  # Create a dataframe with module membership information and node names
  module_df <- data.frame(V1 = module_membership, V2 = V(g)$name)
  module_df <- merge(module_df, ID.tmp, by.x = "V2", by.y = "ASV_ID")
  names(module_df) <- c(
    "ASV_ID",
    "Module",
    "Kingdom",
    "Phylum",
    "Class",
    "Order",
    "Family",
    "Genus",
    "Species"
  )

  library(dplyr)
  module_df <- module_df %>% dplyr::select(
    "Module",
    "ASV_ID",
    "Kingdom",
    "Phylum",
    "Class",
    "Order",
    "Family",
    "Genus",
    "Species"
  )

  # Saving table
  write.table(module_df,
    file = paste0("7_editing_figures/co-occurence_network/SpMan_network/module_details/", subset, "_subset_", compartment, "_BF.txt"),
    sep = "\t",
    row.names = FALSE
  )
}

# Execute the function with different parameters
bac.fung.cross.net("Pg-spec", "seeds", "ASV", "all")
## [1] "Not enough data. Skip the co-occurence network analysis."
## NULL
bac.fung.cross.net("Ps-spec", "seeds", "ASV", "all")

bac.fung.cross.net("Pg-spec", "pulps", "ASV", "all")

bac.fung.cross.net("Ps-spec", "pulps", "ASV", "all")

bac.fung.cross.net("Pg-spec", "leaves", "ASV", "all")

bac.fung.cross.net("Ps-spec", "leaves", "ASV", "all")
## [1] "Not enough data. Skip the co-occurence network analysis."
## NULL
bac.fung.cross.net("Pg-spec", "roots", "ASV", "all")

bac.fung.cross.net("Ps-spec", "roots", "ASV", "all")

bac.fung.cross.net("Pg-spec", "soil", "ASV", "all")

bac.fung.cross.net("Ps-spec", "soil", "ASV", "all")

Graphe and module informations

rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory

# Path to the directory containing the .RDS files
path <- "7_editing_figures/co-occurence_network/SpMan_network/property"

# Obtain the list of .RDS files in the directory
files <- list.files(path = path, pattern = "\\.RDS$", full.names = TRUE)

# Initialise a list to store loaded data
list_df <- list()

# Load each .RDS file in the list
for (file in files) {
  x <- tools::file_path_sans_ext(basename(file)) # Name of the variable to be used
  list_df[[x]] <- readRDS(file)
}

# Assemble all dataframes into a single dataframe
df <- do.call(rbind, list_df)
colnames(df) <- colnames(list_df[[1]]) # Assign column names

# Save dataframe
write.table(df,
  file = "7_editing_figures/co-occurence_network/SpMan_network/property/network_statistics.txt",
  sep = "\t",
  row.names = FALSE
)

# Display table
library(DT)
print(datatable(df, width = NULL, height = NULL, caption = "Network statistics"))

show.tab <- function(type, subset, compartment) {
  # subset = "Pg" or "Ps"
  # compartment = "seeds", "pulps", "leaves", "roots" or "soil"
  # type = "ASV"

  # Loading table
  path <- paste0("7_editing_figures/co-occurence_network/SpMan_network/module_details/", subset, "_subset_", compartment, "_BF.txt")
  if (!file.exists(path)) {
    print("No avalaible data.")
    return()
  } else {
    tab <- read.table(
      file = path,
      sep = "\t",
      header = TRUE
    )

    # Diplay table
    library(DT)
    print(datatable(tab,
      width = NULL, height = NULL, rownames = FALSE,
      caption = paste0(subset, "_", compartment, " (", type, ")")
    ))
  }
}

show.tab("ASV", "Pg-spec", "seeds")
## [1] "No avalaible data."
## NULL
show.tab("ASV", "Ps-spec", "seeds")

show.tab("ASV", "Pg-spec", "pulps")
show.tab("ASV", "Ps-spec", "pulps")

show.tab("ASV", "Pg-spec", "leaves")
show.tab("ASV", "Ps-spec", "leaves")
## [1] "No avalaible data."
## NULL
show.tab("ASV", "Pg-spec", "roots")
show.tab("ASV", "Ps-spec", "roots")

show.tab("ASV", "Pg-spec", "soil")
show.tab("ASV", "Ps-spec", "soil")